import os
from bs4 import BeautifulSoup
from openpyxl import Workbook

dest_dir = "D:\\excel\\"
excel_file_path = "D:\\excel\\douban_movies.xlsx"

# 创建一个新的工作簿
wb = Workbook()
ws = wb.active
# 设置表头
headers = ['Title', 'Rating', 'Comment_Num', 'Directors', 'Actors', 'Release_Date', 'Country', 'Genre', 'Pic_Link']
ws.append(headers)

for html_file in os.listdir(dest_dir):
    if html_file.endswith('.html'):
        print(html_file.title())
        try:
            with open(os.path.join(dest_dir, html_file), "r", encoding="utf-8") as f:
                html = f.read()
                soup = BeautifulSoup(html, 'lxml')
                movie_list = soup.find('ol', class_='grid_view').find_all('li')
                for movie in movie_list:
                    title = movie.find('div', class_='hd').find('span', class_='title').get_text()
                    rating_num = movie.find('div', class_='bd').find('div').find('span', class_='rating_num').get_text()
                    comment_num = movie.find('div', class_='bd').find('div').find_all('span')[-1].get_text()
                    info = movie.find('div', class_='bd').find('p').get_text().strip()
                    directors_and_actors = info.split('\n')[0].strip()
                    if '主演:' in directors_and_actors:
                        directors, actors = directors_and_actors.split('主演:', 1)
                        directors = directors.replace('导演: ', '').strip()
                        actors = actors.strip()
                    else:
                        directors = directors_and_actors.replace('导演: ', '').strip()
                        actors = ""
                    release_info = info.split('\n')[1].strip()
                    release_date, country, genre = [i.strip() for i in release_info.split('/')]
                    pic = movie.find('div', class_='item').find('div', class_='pic').find('a').find('img').get('src')

                    row = [title, rating_num, comment_num, directors, actors, release_date, country, genre, pic]
                    ws.append(row)
        except FileNotFoundError:
            print(f"文件 {html_file} 未找到。")
        except Exception as e:
            print(f"处理文件 {html_file} 时出现错误: {e}")

# 保存工作簿
wb.save(excel_file_path)
